#include<iostream>
#include <immintrin.h>

using namespace std;

int main()
{
    alignas(32) float a[16] = {1,2,3,4,5,6,7,8,2,2,2,2,2,2,2,2};
    alignas(32) float b[16] = {8,7,6,5,4,3,2,1,8,7,6,5,4,3,2,1};
    alignas(32) float c[16];

    __m256 va = _mm256_loadu_ps(a); // 用 loadu 更稳妥
    __m256 vb = _mm256_loadu_ps(b);
    __m256 vc = _mm256_mul_ps(va, vb);
    _mm256_storeu_ps(c, vc);

    for (int i=0;i<8;++i) std::cout << c[i] << " ";
    std::cout << "\n";
}